knitr::opts_chunk$set(echo = TRUE)
library(readr) library(dplyr) library(broom) nyc_demo <- read_csv("nyc_zip_demo.csv") nyc_demo_more <- read_csv('raw_data/nyc_zip_demographics.csv') nyc_demo <- left_join(nyc_demo, nyc_demo_more,by = c("Zipcode" = "ZIPCODE")) coffee_zip <- read_csv("coffee_zip.csv") df <- left_join(coffee_zip, nyc_demo) %>% mutate(Women_pct = 100 - Men_pct)
library(ggthemes) library(ggplot2) library(tidyr) pc1 <- ggplot(df, aes(x = Total_Income, y = Avg_rating, color = Name)) + geom_point(alpha = 0.8, radius = 0.5) + xlab('Total Income') + ylab('Average Rating for Coffee Shops') + labs(caption = 'Data Source: Yelp') + geom_smooth(method=lm, se=FALSE, linetype="dashed") + ggtitle('Total Income vs Average Rating for Coffee') + theme_linedraw() + scale_color_manual(values = c("#34afd1", "#f09a56", "#87dc97")) + theme(plot.title = element_text(size=14, face="bold", hjust = 0.5), legend.text = element_text(size=8), legend.title = element_text(size=8)) pc1
pc2 <- ggplot(df, aes(x = Mean_Income, y = Avg_rating, color = Name)) + geom_point(alpha = 0.8, radius = 0.5) + xlab('Mean Income') + ylab('Average Rating for Coffee Shops') + labs(caption = 'Data Source: Yelp') + geom_smooth(method=lm, se=FALSE, linetype="dashed") + ggtitle('Mean Income vs Average Rating for Coffee') + theme_linedraw() + scale_color_manual(values = c("#34afd1", "#f09a56", "#87dc97")) + theme(plot.title = element_text(size=14, face="bold", hjust = 0.5), legend.text = element_text(size=8), legend.title = element_text(size=8)) pc2
pc3 <- ggplot(df, aes(x = Population, y = Avg_rating, color = Name)) + geom_point(alpha = 0.8, radius = 0.5) + xlab('Population') + ylab('Average Rating for Coffee Shops') + labs(caption = 'Data Source: Yelp') + geom_smooth(method=lm, se=FALSE, linetype="dashed") + ggtitle('Population vs Average Rating for Coffee') + theme_linedraw() + scale_color_manual(values = c("#34afd1", "#f09a56", "#87dc97")) + theme(plot.title = element_text(size=14, face="bold", hjust = 0.5), legend.text = element_text(size=8), legend.title = element_text(size=8)) pc3
pc6 <- ggplot(df, aes(x = Poverty_pct, y = Avg_rating, color = Name)) + geom_point(alpha = 0.8, radius = 0.5) + xlab('Poverty Percentage') + ylab('Average Rating for Coffee Shops') + labs(caption = 'Data Source: Yelp') + geom_smooth(method=lm, se=FALSE, linetype="dashed") + ggtitle('Poverty Percentage vs Average Rating for Coffee') + theme_linedraw() + scale_color_manual(values = c("#34afd1", "#f09a56", "#87dc97")) + theme(plot.title = element_text(size=12, face="bold", hjust = 0.5), legend.text = element_text(size=8), legend.title = element_text(size=8)) pc6
pc7 <- ggplot(df, aes(x = Unemployment_pct, y = Avg_rating, color = Name)) + geom_point(alpha = 0.8, radius = 0.5) + xlab('Unemployment Percentage') + ylab('Average Rating for Coffee Shops') + labs(caption = 'Data Source: Yelp') + geom_smooth(method=lm, se=FALSE, linetype="dashed") + ggtitle('Unemployment Percentage vs Average Rating for Coffee') + theme_linedraw() + scale_color_manual(values = c("#34afd1", "#f09a56", "#87dc97")) + theme(plot.title = element_text(size=12, face="bold", hjust = 0.5), legend.text = element_text(size=8), legend.title = element_text(size=8)) pc7
pc8 <- ggplot(df, aes(x = Unemployment_pct, y = Poverty_pct, color = Name)) + geom_point(alpha = 0.8, radius = 0.5) + xlab('Unemployment Percentage') + ylab('Poverty Percentage') + labs(caption = 'Data Source: Yelp') + geom_smooth(method=lm, se=FALSE, linetype="dashed") + ggtitle('Unemployment Percentage vs Poverty Percentage') + theme_linedraw() + scale_color_manual(values = c("#34afd1", "#f09a56", "#87dc97")) + theme(plot.title = element_text(size=14, face="bold", hjust = 0.5), legend.text = element_text(size=8), legend.title = element_text(size=8)) pc8
coffee_lm <- df coffee_lm$Name[coffee_lm$Name == "Dunkin'"] <- 0 coffee_lm$Name[coffee_lm$Name == "Starbucks"] <- 1 coffee_lm$Name[coffee_lm$Name == "Blue Bottle"] <- 2 coffee_sb <- coffee_lm %>% filter(Name == 1) coffee_dd <- coffee_lm %>% filter(Name == 0) coffee_bb <- coffee_lm %>% filter(Name == 2)
# write_csv(coffee_sb, "./cleaned_sb_only.csv", na = "NA" ) # write_csv(coffee_dd, "./cleaned_dd_only.csv", na = "NA" )
df_gather <- coffee_sb %>% gather(Men_pct, Women_pct, key = gender, value = gender_value)
pc_race <- ggplot(df_gather, aes(x = gender_value, y = Avg_rating, color = gender)) + geom_point(alpha = 0.8, radius = 0.5) + xlab('Gender Percentage') + ylab('Average Rating for Starbucks') + labs(caption = 'Data Source: Yelp') + geom_smooth(method=lm, se=FALSE, linetype="dashed") + ggtitle('Gender Percentage vs Average Rating for Starbucks') + theme_linedraw() + scale_color_manual(values = c("#34afd1", "#f09a56", "#87dc97" )) + theme(plot.title = element_text(size=12, face="bold", hjust = 0.5), legend.text = element_text(size=8), legend.title = element_text(size=8)) pc_race
df_gather <- coffee_dd %>% gather(Men_pct, Women_pct, key = gender, value = gender_value)
pc_race <- ggplot(df_gather, aes(x = gender_value, y = Avg_rating, color = gender)) + geom_point(alpha = 0.8, radius = 0.5) + xlab('Gender Percentage') + ylab('Average Rating for the Dunkin') + labs(caption = 'Data Source: Yelp') + geom_smooth(method=lm, se=FALSE, linetype="dashed") + ggtitle('Gender Percentage vs Average Rating for the Dunkin') + theme_linedraw() + scale_color_manual(values = c("#34afd1", "#f09a56", "#87dc97" )) + theme(plot.title = element_text(size=12, face="bold", hjust = 0.5), legend.text = element_text(size=8), legend.title = element_text(size=8)) pc_race
df_gather <- coffee_bb %>% gather(Men_pct, Women_pct, key = gender, value = gender_value)
pc_race <- ggplot(df_gather, aes(x = gender_value, y = Avg_rating, color = gender)) + geom_point(alpha = 0.8, radius = 0.5) + xlab('Gender Percentage') + ylab('Average Rating for Blue Bottle') + labs(caption = 'Data Source: Yelp') + geom_smooth(method=lm, se=FALSE, linetype="dashed") + ggtitle('Gender Percentage vs Average Rating for Blue Bottle') + theme_linedraw() + scale_color_manual(values = c("#34afd1", "#f09a56", "#87dc97" )) + theme(plot.title = element_text(size=12, face="bold", hjust = 0.5), legend.text = element_text(size=8), legend.title = element_text(size=8)) pc_race
df_gather <- coffee_sb %>% gather(White_pct, Black_pct, Native_pct, Asian_pct, key = race, value = race_value)
pc_race <- ggplot(df_gather, aes(x = race_value, y = Avg_rating, color = race)) + geom_point(alpha = 0.8, radius = 0.5) + xlab('Race Percentage') + ylab('Average Rating for Starbucks') + labs(caption = 'Data Source: Yelp') + geom_smooth(method=lm, se=FALSE, linetype="dashed") + ggtitle('Race Percentage vs Average Rating for Starbucks') + theme_linedraw() + scale_color_manual(values = c("#34afd1", "#f09a56", "#87dc97", "#ABA9A9")) + theme(plot.title = element_text(size=12, face="bold", hjust = 0.5), legend.text = element_text(size=8), legend.title = element_text(size=8)) pc_race
df_gather <- coffee_dd %>% gather(White_pct, Black_pct, Native_pct, Asian_pct, key = race, value = race_value)
pc_race <- ggplot(df_gather, aes(x = race_value, y = Avg_rating, color = race)) + geom_point(alpha = 0.8, radius = 0.5) + xlab('Race Percentage') + ylab('Average Rating for the Dunkin') + labs(caption = 'Data Source: Yelp') + geom_smooth(method=lm, se=FALSE, linetype="dashed") + ggtitle('Race Percentage vs Average Rating for the Dunkin') + theme_linedraw() + scale_color_manual(values = c("#34afd1", "#f09a56", "#87dc97", "#ABA9A9")) + theme(plot.title = element_text(size=12, face="bold", hjust = 0.5), legend.text = element_text(size=8), legend.title = element_text(size=8)) pc_race
df_gather <- coffee_bb %>% gather(White_pct, Black_pct, Native_pct, Asian_pct, key = race, value = race_value)
pc_race <- ggplot(df_gather, aes(x = race_value, y = Avg_rating, color = race)) + geom_point(alpha = 0.8, radius = 0.5) + xlab('Race Percentage') + ylab('Average Rating for Blue Bottle') + labs(caption = 'Data Source: Yelp') + geom_smooth(method=lm, se=FALSE, linetype="dashed") + ggtitle('Race Percentage vs Average Rating for Blue Bottle') + theme_linedraw() + scale_color_manual(values = c("#34afd1", "#f09a56", "#87dc97", "#ABA9A9")) + theme(plot.title = element_text(size=12, face="bold", hjust = 0.5), legend.text = element_text(size=8), legend.title = element_text(size=8)) pc_race
library(jtools) lm <- lm(Avg_rating ~ Total_Income, data = coffee_sb) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Total_Income, data = coffee_dd) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Total_Income, data = coffee_dd) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Mean_Income, data = coffee_sb) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Mean_Income, data = coffee_dd) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Mean_Income, data = coffee_bb) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Population, data = coffee_sb) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Population, data = coffee_dd) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Population, data = coffee_bb) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Women_pct, data = coffee_sb) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Men_pct, data = coffee_sb) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Women_pct, data = coffee_dd) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Men_pct, data = coffee_dd) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Women_pct, data = coffee_bb) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Men_pct, data = coffee_bb) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ White_pct, data = coffee_sb) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Asian_pct, data = coffee_sb) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Black_pct, data = coffee_sb) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Native_pct, data = coffee_sb) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ White_pct, data = coffee_dd) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Asian_pct, data = coffee_dd) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Black_pct, data = coffee_dd) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Native_pct, data = coffee_dd) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ White_pct, data = coffee_bb) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Asian_pct, data = coffee_bb) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Black_pct, data = coffee_bb) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Native_pct, data = coffee_bb) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Poverty_pct, data = coffee_sb) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Poverty_pct, data = coffee_dd) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Poverty_pct, data = coffee_bb) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Unemployment_pct, data = coffee_sb) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Unemployment_pct, data = coffee_dd) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Unemployment_pct, data = coffee_bb) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Population*Mean_Income, data = coffee_sb) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Population*Mean_Income, data = coffee_dd) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Population*Mean_Income, data = coffee_bb) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Population + Mean_Income, data = coffee_sb) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Population + Mean_Income, data = coffee_dd) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Population + Mean_Income, data = coffee_bb) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Population + Mean_Income + Avg_review_count, data = coffee_sb) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Population + Mean_Income + Avg_review_count, data = coffee_dd) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Population + Mean_Income + Avg_review_count, data = coffee_bb) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Population*Mean_Income + Avg_review_count, data = coffee_dd) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Population*Mean_Income + Avg_review_count, data = coffee_bb) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Population*Mean_Income + Avg_review_count, data = coffee_sb) summary(lm) summ(lm)
lm <- lm(Avg_rating ~ Population + Mean_Income + Avg_review_count + Men_pct + White_pct + Asian_pct + Black_pct + Poverty_pct + Unemployment_pct, data = coffee_sb) summary(lm) summ(lm)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.